#front matter
rm(list=ls())
library(foreign)
library(car)
library(lme4)
library(texreg)
library(devtools)
#install_github("malecki/mrp", sub="mrpdata")
#install_github("malecki/mrp", sub="mrp")
library(mrp)
library(mrpdata)

#set working directory
#setwd('c:/Dropbox/Files/Research/Democratic_Deficit_Salient_Issues')
setwd('/Volumes/MONOGAN/immigration/demDef/cces/FullModule')
#setwd('C:/temp')
#setwd('Dropbox/demDef')


#read data
cces <- read.dta('CCES14_UGA_OUTPUT_subset.dta',convert.factors=F)

#clean data
cces$bilingual<-recode(cces$UGA323, '1=0;2=1;3=NA;4=NA') #17% liberal
cces$tuition<-recode(cces$UGA324, '1=1;2=0;9=NA;98=NA') #63% liberal
cces$license<-recode(cces$UGA325, '1=1;2=0;9=NA;98=NA') #29% liberal
cces$everify<-recode(cces$UGA326, '1=0;2=1;9=NA;98=NA') #13% liberal
cces$marj<-recode(cces$UGA327, '1=1;2=0;9=NA;98=NA') #75% liberal
cces$schip<-recode(cces$UGA328, '1=1;2=0;9=NA;98=NA') #54% liberal
cces$euth<-recode(cces$UGA329, '1=1;2=0;9=NA;98=NA') #63% liberal
cces$black<-as.numeric(cces$race==2)
cces$hisp<-as.numeric(cces$race==3)
cces$educ<-recode(cces$educ,'1=1;2=2;3:4=3;5:6=4')
cces$female<-as.numeric(cces$gender==2)
cces$race.female<-1 #white men
cces$race.female[cces$female==0 & cces$black==1]<-2 #black men
cces$race.female[cces$female==0 & cces$hisp==1]<-3 #hispanic men
cces$race.female[cces$female==1 & cces$hisp==0 & cces$black==0]<-4 #white women
cces$race.female[cces$female==1 & cces$black==1]<-5 #black women
cces$race.female[cces$female==1 & cces$hisp==1]<-6 #hispanic women
cces$age.0<-2014-cces$birthyr
cces$age<-recode(cces$age.0,'lo:29=1;30:44=2;45:64=3;65:hi=4')
cces$age.educ<-cces$age*cces$educ
cces$region.0 <- cces$region
cces$region <- NA
cces$region[cces$region.0==1] <- "northeast"
cces$region[cces$region.0==2] <- "midwest"
cces$region[cces$region.0==3] <- "south"
cces$region[cces$region.0==4] <- "west"

cces$statecode <- NA
cces$statecode[cces$StateAbbr=="AK"] <- 1
cces$statecode[cces$StateAbbr=="AL"] <- 2
cces$statecode[cces$StateAbbr=="AR"] <- 3
cces$statecode[cces$StateAbbr=="AZ"] <- 4
cces$statecode[cces$StateAbbr=="CA"] <- 5
cces$statecode[cces$StateAbbr=="CO"] <- 6
cces$statecode[cces$StateAbbr=="CT"] <- 7
cces$statecode[cces$StateAbbr=="DC"] <- 8
cces$statecode[cces$StateAbbr=="DE"] <- 9
cces$statecode[cces$StateAbbr=="FL"] <- 10
cces$statecode[cces$StateAbbr=="GA"] <- 11
cces$statecode[cces$StateAbbr=="HI"] <- 12
cces$statecode[cces$StateAbbr=="IA"] <- 13
cces$statecode[cces$StateAbbr=="ID"] <- 14
cces$statecode[cces$StateAbbr=="IL"] <- 15
cces$statecode[cces$StateAbbr=="IN"] <- 16
cces$statecode[cces$StateAbbr=="KS"] <- 17
cces$statecode[cces$StateAbbr=="KY"] <- 18
cces$statecode[cces$StateAbbr=="LA"] <- 19
cces$statecode[cces$StateAbbr=="MA"] <- 20
cces$statecode[cces$StateAbbr=="MD"] <- 21
cces$statecode[cces$StateAbbr=="ME"] <- 22
cces$statecode[cces$StateAbbr=="MI"] <- 23
cces$statecode[cces$StateAbbr=="MN"] <- 24
cces$statecode[cces$StateAbbr=="MO"] <- 25
cces$statecode[cces$StateAbbr=="MS"] <- 26
cces$statecode[cces$StateAbbr=="MT"] <- 27
cces$statecode[cces$StateAbbr=="NC"] <- 28
cces$statecode[cces$StateAbbr=="ND"] <- 29
cces$statecode[cces$StateAbbr=="NE"] <- 30
cces$statecode[cces$StateAbbr=="NH"] <- 31
cces$statecode[cces$StateAbbr=="NJ"] <- 32
cces$statecode[cces$StateAbbr=="NM"] <- 33
cces$statecode[cces$StateAbbr=="NV"] <- 34
cces$statecode[cces$StateAbbr=="NY"] <- 35
cces$statecode[cces$StateAbbr=="OH"] <- 36
cces$statecode[cces$StateAbbr=="OK"] <- 37
cces$statecode[cces$StateAbbr=="OR"] <- 38
cces$statecode[cces$StateAbbr=="PA"] <- 39
cces$statecode[cces$StateAbbr=="RI"] <- 40
cces$statecode[cces$StateAbbr=="SC"] <- 41
cces$statecode[cces$StateAbbr=="SD"] <- 42
cces$statecode[cces$StateAbbr=="TN"] <- 43
cces$statecode[cces$StateAbbr=="TX"] <- 44
cces$statecode[cces$StateAbbr=="UT"] <- 45
cces$statecode[cces$StateAbbr=="VA"] <- 46
cces$statecode[cces$StateAbbr=="VT"] <- 47
cces$statecode[cces$StateAbbr=="WA"] <- 48
cces$statecode[cces$StateAbbr=="WI"] <- 49
cces$statecode[cces$StateAbbr=="WV"] <- 50
cces$statecode[cces$StateAbbr=="WY"] <- 51

#load state-level data
Statelevel <- read.dta("state_level_update.dta",convert.underscore=TRUE)

#merge state Obama 2012 vote share into CCES data
Statelevel <- Statelevel[order(Statelevel$sstate.initnum),]
Statelevel.sub <- subset(Statelevel,select=c(sstate,obamavote2012))
cces <- merge(x=cces,y=Statelevel.sub,by.x='StateAbbr',by.y='sstate',all.x=T)

#merge state percentage Evangelical/Mormon into CCES data
Statelevel.subrelig <- subset(Statelevel,select=c(sstate,p.evanmorm))
cces <- merge(x=cces,y=Statelevel.subrelig,by.x='StateAbbr',by.y='sstate',all.x=T)

#load census data
#Census <- read.dta("ACS2013_poststratification.dta",convert.underscore=TRUE)
Census <- read.dta("ACS2014_poststratification.dta",convert.underscore=TRUE)
Census <- Census[order(Census$cstate),]
Census$cstate.initnum <- match(Census$cstate, Statelevel$sstate)

#create index variables for poststratification
Census$crace.female <- (Census$cfemale * 3) + Census$crace.WBH

#merge Obama 2012 vote share into Census data
Census<-merge(x=Census,y=Statelevel.sub,by.x='cstate',by.y='sstate',all.x=T)
Census<-merge(x=Census,y=Statelevel.subrelig,by.x='cstate',by.y='sstate',all.x=T)

# ESTIMATE MODELS:

#1. bilingual model
bilingual.mod <- glmer(bilingual~obamavote2012+p.evanmorm+(1|race.female)+(1|age)+(1|educ)+(1|statecode)+(1|region),family=binomial(link="logit"),data=cces,
	control=glmerControl(optimizer="bobyqa", optCtrl = list(maxfun = 100000000))); summary(bilingual.mod)

#store state random effects
state.ranefs <- rep(NA,51)
names(state.ranefs) <- Statelevel$sstate
for (i in 1:51){
   state.ranefs[i] <- ranef(bilingual.mod)$statecode[paste(i),1]
}
state.ranefs[is.na(state.ranefs)] <- 0 #give 0 random effects to non-represented states

#estimate predictions for demographic types
cellpred <- invlogit(fixef(bilingual.mod)["(Intercept)"] +
		fixef(bilingual.mod)["obamavote2012"]*Census$obamavote2012 +
		fixef(bilingual.mod)["p.evanmorm"]*Census$p.evanmorm +
		ranef(bilingual.mod)$race.female[Census$crace.female,1] +
		ranef(bilingual.mod)$age[Census$cage.cat,1] +
		ranef(bilingual.mod)$educ[Census$cedu.cat,1] +
		state.ranefs[Census$cstate] +
		ranef(bilingual.mod)$region[Census$cregion,1]
		)


#weight predictions by actual population frequency
cellpredweighted <- cellpred * Census$cpercent.state
statepred <- 100 * as.vector(tapply(cellpredweighted,Census$cstate,sum))
names(statepred) <- Statelevel$sstate
bilingual.statepred <- statepred

p.1<-as.numeric(predict(bilingual.mod, type="response")>0.5)
mean(p.1==cces$bilingual[!is.na(cces$bilingual)])
table(p.1,cces$bilingual[!is.na(cces$bilingual)])

#2. tuition model
tuition.mod <- glmer(tuition~obamavote2012+p.evanmorm+(1|race.female)+(1|age)+(1|educ)+(1|statecode)+(1|region),family=binomial(link="logit"),data=cces,
	control=glmerControl(optimizer="Nelder_Mead", optCtrl = list(maxfun = 100000000))); summary(tuition.mod)

#store state random effects
state.ranefs <- rep(NA,51)
names(state.ranefs) <- Statelevel$sstate
for (i in 1:51){
   state.ranefs[i] <- ranef(tuition.mod)$statecode[paste(i),1]
}
state.ranefs[is.na(state.ranefs)] <- 0 #give 0 random effects to non-represented states

#estimate predictions for demographic types
cellpred <- invlogit(fixef(tuition.mod)["(Intercept)"] +
		fixef(tuition.mod)["obamavote2012"]*Census$obamavote2012 +
		fixef(tuition.mod)["p.evanmorm"]*Census$p.evanmorm +
		ranef(tuition.mod)$race.female[Census$crace.female,1] +
		ranef(tuition.mod)$age[Census$cage.cat,1] +
		ranef(tuition.mod)$educ[Census$cedu.cat,1] +
		state.ranefs[Census$cstate] +
		ranef(tuition.mod)$region[Census$cregion,1]
		)

#weight predictions by actual population frequency
cellpredweighted <- cellpred * Census$cpercent.state
statepred <- 100 * as.vector(tapply(cellpredweighted,Census$cstate,sum))
names(statepred) <- Statelevel$sstate
tuition.statepred <- statepred

p.2<-as.numeric(predict(tuition.mod, type="response")>0.5)
mean(p.2==cces$tuition[!is.na(cces$tuition)])
table(p.2,cces$tuition[!is.na(cces$tuition)])

#3. license model
license.mod <- glmer(license~obamavote2012+p.evanmorm+(1|race.female)+(1|age)+(1|educ)+(1|statecode)+(1|region),family=binomial(link="logit"),data=cces,
	control=glmerControl(optimizer="bobyqa", optCtrl = list(maxfun = 100000000))); summary(license.mod)

#store state random effects
state.ranefs <- rep(NA,51)
names(state.ranefs) <- Statelevel$sstate
for (i in 1:51){
   state.ranefs[i] <- ranef(license.mod)$statecode[paste(i),1]
}
state.ranefs[is.na(state.ranefs)] <- 0 #give 0 random effects to non-represented states

#estimate predictions for demographic types
cellpred <- invlogit(fixef(license.mod)["(Intercept)"] +
		fixef(license.mod)["obamavote2012"]*Census$obamavote2012 +
		fixef(license.mod)["p.evanmorm"]*Census$p.evanmorm +
		ranef(license.mod)$race.female[Census$crace.female,1] +
		ranef(license.mod)$age[Census$cage.cat,1] +
		ranef(license.mod)$educ[Census$cedu.cat,1] +
		state.ranefs[Census$cstate] +
		ranef(license.mod)$region[Census$cregion,1]
		)

#weight predictions by actual population frequency
cellpredweighted <- cellpred * Census$cpercent.state
statepred <- 100 * as.vector(tapply(cellpredweighted,Census$cstate,sum))
names(statepred) <- Statelevel$sstate
license.statepred <- statepred

p.3<-as.numeric(predict(license.mod, type="response")>0.5)
mean(p.3==cces$license[!is.na(cces$license)])
table(p.3,cces$license[!is.na(cces$license)])

#4. everify model
everify.mod <- glmer(everify~obamavote2012+p.evanmorm+(1|race.female)+(1|age)+(1|educ)+(1|statecode)+(1|region),family=binomial(link="logit"),data=cces,
	control=glmerControl(optimizer="Nelder_Mead", optCtrl = list(maxfun = 100000000))); summary(everify.mod)

#store state random effects
state.ranefs <- rep(NA,51)
names(state.ranefs) <- Statelevel$sstate
for (i in 1:51){
   state.ranefs[i] <- ranef(everify.mod)$statecode[paste(i),1]
}
state.ranefs[is.na(state.ranefs)] <- 0 #give 0 random effects to non-represented states

#estimate predictions for demographic types
cellpred <- invlogit(fixef(everify.mod)["(Intercept)"] +
		fixef(everify.mod)["obamavote2012"]*Census$obamavote2012 +
		fixef(everify.mod)["p.evanmorm"]*Census$p.evanmorm +
		ranef(everify.mod)$race.female[Census$crace.female,1] +
		ranef(everify.mod)$age[Census$cage.cat,1] +
		ranef(everify.mod)$educ[Census$cedu.cat,1] +
		state.ranefs[Census$cstate] +
		ranef(everify.mod)$region[Census$cregion,1]
		)

#weight predictions by actual population frequency
cellpredweighted <- cellpred * Census$cpercent.state
statepred <- 100 * as.vector(tapply(cellpredweighted,Census$cstate,sum))
names(statepred) <- Statelevel$sstate
everify.statepred <- statepred

p.4<-as.numeric(predict(everify.mod, type="response")>0.5)
mean(p.4==cces$everify[!is.na(cces$everify)])
table(p.4,cces$everify[!is.na(cces$everify)])

#5. marj model
marj.mod <- glmer(marj~obamavote2012+p.evanmorm+(1|race.female)+(1|age)+(1|educ)+(1|statecode)+(1|region),family=binomial(link="logit"),data=cces,
	control=glmerControl(optimizer="Nelder_Mead", optCtrl = list(maxfun = 1000000000))); summary(marj.mod)

#store state random effects
state.ranefs <- rep(NA,51)
names(state.ranefs) <- Statelevel$sstate
for (i in 1:51){
   state.ranefs[i] <- ranef(marj.mod)$statecode[paste(i),1]
}
state.ranefs[is.na(state.ranefs)] <- 0 #give 0 random effects to non-represented states

#estimate predictions for demographic types
cellpred <- invlogit(fixef(marj.mod)["(Intercept)"] +
		fixef(marj.mod)["obamavote2012"]*Census$obamavote2012 +
		fixef(marj.mod)["p.evanmorm"]*Census$p.evanmorm +
		ranef(marj.mod)$race.female[Census$crace.female,1] +
		ranef(marj.mod)$age[Census$cage.cat,1] +
		ranef(marj.mod)$educ[Census$cedu.cat,1] +
		state.ranefs[Census$cstate] +
		ranef(marj.mod)$region[Census$cregion,1]
		)

#weight predictions by actual population frequency
cellpredweighted <- cellpred * Census$cpercent.state
statepred <- 100 * as.vector(tapply(cellpredweighted,Census$cstate,sum))
names(statepred) <- Statelevel$sstate
marj.statepred <- statepred

p.5<-as.numeric(predict(marj.mod, type="response")>0.5)
mean(p.5==cces$marj[!is.na(cces$marj)])
table(p.5,cces$marj[!is.na(cces$marj)])

#6. schip model
schip.mod <- glmer(schip~obamavote2012+p.evanmorm+(1|race.female)+(1|age)+(1|educ)+(1|statecode)+(1|region),family=binomial(link="logit"),data=cces,
	control=glmerControl(optimizer="bobyqa", optCtrl = list(maxfun = 100000000))); summary(schip.mod)

#store state random effects
state.ranefs <- rep(NA,51)
names(state.ranefs) <- Statelevel$sstate
for (i in 1:51){
   state.ranefs[i] <- ranef(schip.mod)$statecode[paste(i),1]
}
state.ranefs[is.na(state.ranefs)] <- 0 #give 0 random effects to non-represented states

#estimate predictions for demographic types
cellpred <- invlogit(fixef(schip.mod)["(Intercept)"] +
		fixef(schip.mod)["obamavote2012"]*Census$obamavote2012 +
		fixef(schip.mod)["p.evanmorm"]*Census$p.evanmorm +
		ranef(schip.mod)$race.female[Census$crace.female,1] +
		ranef(schip.mod)$age[Census$cage.cat,1] +
		ranef(schip.mod)$educ[Census$cedu.cat,1] +
		state.ranefs[Census$cstate] +
		ranef(schip.mod)$region[Census$cregion,1]
		)

#weight predictions by actual population frequency
cellpredweighted <- cellpred * Census$cpercent.state
statepred <- 100 * as.vector(tapply(cellpredweighted,Census$cstate,sum))
names(statepred) <- Statelevel$sstate
schip.statepred <- statepred

p.6<-as.numeric(predict(schip.mod, type="response")>0.5)
mean(p.6==cces$schip[!is.na(cces$schip)])
table(p.6,cces$schip[!is.na(cces$schip)])

#7. euth model
euth.mod <- glmer(euth~obamavote2012+p.evanmorm+(1|race.female)+(1|age)+(1|educ)+(1|statecode)+(1|region),family=binomial(link="logit"),data=cces,
	control=glmerControl(optimizer="Nelder_Mead", optCtrl = list(maxfun = 100000000))); summary(euth.mod)

#store state random effects
state.ranefs <- rep(NA,51)
names(state.ranefs) <- Statelevel$sstate
for (i in 1:51){
   state.ranefs[i] <- ranef(euth.mod)$statecode[paste(i),1]
}
state.ranefs[is.na(state.ranefs)] <- 0 #give 0 random effects to non-represented states

#estimate predictions for demographic types
cellpred <- invlogit(fixef(euth.mod)["(Intercept)"] +
		fixef(euth.mod)["obamavote2012"]*Census$obamavote2012 +
		fixef(euth.mod)["p.evanmorm"]*Census$p.evanmorm +
		ranef(euth.mod)$race.female[Census$crace.female,1] +
		ranef(euth.mod)$age[Census$cage.cat,1] +
		ranef(euth.mod)$educ[Census$cedu.cat,1] +
		state.ranefs[Census$cstate] +
		ranef(euth.mod)$region[Census$cregion,1]
		)

#weight predictions by actual population frequency
cellpredweighted <- cellpred * Census$cpercent.state
statepred <- 100 * as.vector(tapply(cellpredweighted,Census$cstate,sum))
names(statepred) <- Statelevel$sstate
euth.statepred <- statepred

p.7<-as.numeric(predict(euth.mod, type="response")>0.5)
mean(p.7==cces$euth[!is.na(cces$euth)])
table(p.7,cces$euth[!is.na(cces$euth)])

#output to LaTeX
texreg(list(bilingual.mod,license.mod,tuition.mod,everify.mod,marj.mod,schip.mod,euth.mod),stars=c(.05),label="mrpModels",caption.above=T,caption="\\textbf{Multilevel Logistic Regression Models of Whether the Respondent Prefers the Liberal Position on an Issue}",dcolumn=T,custom.coef.names=c('Intercept','Obama 2012',"State religious conservatism"),custom.gof.names=c('AIC','BIC','Log Likelihood','Num.~obs.','Num.~states','Race/female groups','Regions','Education groups','Age groups','Variance: state intercepts','Variance: race-sex intercepts','Variance: region intercepts','Variance: education intercepts','Variance: age intercepts'))
#state estimates
state.estimates <- cbind(bilingual.statepred,tuition.statepred,license.statepred,everify.statepred,marj.statepred,schip.statepred,euth.statepred)
write.csv(state.estimates, "MRP_state_estimates.csv")
